library(dtwclust)
library(dtw)
df<-read.csv("../clean_data/score_list_year.csv")
df
df <- subset(df, select = -c(X) )
df
#colnames(df)[colnames(df) == "X2000"] = "2000"
colnames(df)[colnames(df) == "X2003"] = "2003"
colnames(df)[colnames(df) == "X2005"] = "2005"
colnames(df)[colnames(df) == "X2007"] = "2007"
colnames(df)[colnames(df) == "X2009"] = "2009"
colnames(df)[colnames(df) == "X2011"] = "2011"
colnames(df)[colnames(df) == "X2013"] = "2013"
colnames(df)[colnames(df) == "X2015"] = "2015"
colnames(df)[colnames(df) == "X2017"] = "2017"
colnames(df)[colnames(df) == "X2019"] = "2019"
colnames(df)[colnames(df) == "X2022"] = "2022"
df
jurisdiction = df[['Jurisdiction']]
dtw_df <- df[, -1]
head(dtw_df)
df_lst <- tslist(dtw_df)
remove_nan <- function(ts) {
ts[!is.na(ts)]
}
# Apply the function to each time series in the list
df_lst <- lapply(df_lst, remove_nan)
head(df_lst)
$`1`
[1] 230 230 232 231 233 231 228 229 225 223
$`2`
[1] 226 232 230 236 236 236 237 237 236 233
$`3`
[1] 232 238 234 238 240 235 230 232 230 229
$`4`
[1] 228 233 234 235 240 238 238 238 236 229
$`5`
[1] 230 235 232 232 234 234 232 230 230 227
$`6`
[1] 236 242 241 242 247 244 243 240 239 235
df_cvi <- list()
for (i in 2:10){
df_clust <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw_basic", centroid = "pam")
df_metric <- cvi(df_clust, type = "valid", log.base = 10)
df_cvi <- append(df_cvi, list(df_metric))
}
df_cvi_ma <- do.call(rbind, df_cvi)
rw <- c("K2","K3","K4","K5","K6","K7","K8","K9","K10")
rownames(df_cvi_ma) <- rw
print(df_cvi_ma)
Sil SF CH DB DBstar D COP
K2 0.5327527 0 43.28736 0.9210059 0.9210059 0.08888889 0.23987430
K3 0.2942881 0 30.24129 0.6832054 0.6832054 0.12195122 0.18446804
K4 0.1772767 0 29.91308 1.2875794 1.6104886 0.11000000 0.16877581
K5 0.2171182 0 18.61665 1.1743895 1.3116778 0.13513514 0.14773095
K6 0.2654170 0 20.13447 1.3773396 1.4549548 0.20338983 0.12758693
K7 0.2201177 0 15.77805 1.3722113 1.6611368 0.20754717 0.11927243
K8 0.1617864 0 13.75119 1.4135552 1.7576559 0.19298246 0.11947762
K9 0.1726475 0 13.39630 1.3037561 1.6218060 0.25000000 0.10074319
K10 0.1708211 0 12.97086 1.1688558 1.3140153 0.22727273 0.09627984
– “Sil” (!): Silhouette index (Rousseeuw (1987); to be maximized).-K4 – “SF” (~): Score Function (Saitta et al. (2007); to be maximized; see notes). – “CH” (~): Calinski-Harabasz index (Arbelaitz et al. (2013); to be maximized).-k3 – “DB” (?): Davies-Bouldin index (Arbelaitz et al. (2013); to be minimized).k4 – “DBstar” (?): Modified Davies-Bouldin index (DB*) (Kim and Ramakrishna (2005); to be minimized). -k4 – “D” (!): Dunn index (Arbelaitz et al. (2013); to be maximized). k5 – “COP” (!): COP index (Arbelaitz et al. (2013); to be minimized). k9
# different seeds
df_cvi2 <- list()
for (i in 1:100){
df_clust2 <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw_basic", centroid = "pam", seed=i)
df_metric2 <- cvi(df_clust2, type = "valid", log.base = 10)
df_cvi2 <- append(df_cvi2, list(df_metric2))
}
df_cvi_ma2 <- do.call(rbind, df_cvi2)
rw2 <- as.character(seq(1, 100))
rownames(df_cvi_ma2) <- rw2
print(df_cvi_ma2)
Sil SF CH DB DBstar D COP
1 0.1874888 0 18.25596 1.3053106 1.8287413 0.08396947 0.1917782
2 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
3 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
4 0.1812487 0 26.39508 1.6927962 2.0586129 0.10000000 0.1595358
5 0.1923599 0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
6 0.2507965 0 19.95531 1.3104726 1.7536605 0.08148148 0.2070441
7 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
8 0.2142639 0 22.92645 1.3688596 1.4739257 0.14864865 0.1638778
9 0.2311142 0 23.73130 1.1148819 1.1888411 0.13333333 0.1651468
10 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
11 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
12 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
13 0.2830807 0 19.19311 1.3461046 1.8551322 0.08888889 0.2064483
14 0.2061031 0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
15 0.1440264 0 20.38992 1.8710270 2.1120158 0.09803922 0.1776847
16 0.2061031 0 18.53913 1.3828375 1.8650863 0.10714286 0.1775404
17 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
18 0.2756091 0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
19 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
20 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
21 0.1332506 0 15.41930 1.4563920 1.5887485 0.08888889 0.2194493
22 0.1975725 0 21.11861 1.3645201 1.5256517 0.13750000 0.1576086
23 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
24 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
25 0.2556192 0 20.88889 1.8150376 2.0616855 0.08888889 0.1917037
26 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
27 0.2643667 0 19.45326 1.4257934 1.8834036 0.08888889 0.2052309
28 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
29 0.2359621 0 17.80887 1.0671383 1.3250603 0.08928571 0.1846214
30 0.2643303 0 23.68913 1.5044179 1.6082708 0.14634146 0.1623024
31 0.2198519 0 18.46799 1.3344687 1.8283955 0.08148148 0.2023604
32 0.1923599 0 19.30357 1.2095514 1.6587284 0.08396947 0.1921932
33 0.2909841 0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
34 0.2611797 0 24.10611 1.5499854 1.5889159 0.10989011 0.1684780
35 0.3907959 0 24.86772 1.0765682 1.2718168 0.14634146 0.1685981
36 0.2634083 0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
37 0.1680235 0 18.82338 1.2568437 1.7326249 0.08148148 0.2077470
38 0.2403821 0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
39 0.2379958 0 23.99054 1.2376829 1.3529575 0.13513514 0.1704521
40 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
41 0.2302802 0 17.49444 1.2092021 1.5823800 0.07633588 0.1926561
42 0.2102223 0 18.20317 1.2852829 1.5978954 0.11764706 0.1790028
43 0.2710659 0 22.93085 1.6051506 1.6544439 0.14285714 0.1740123
44 0.2143036 0 26.28056 1.1814812 1.2643829 0.11842105 0.1468198
45 0.3076958 0 24.30530 1.0263258 1.1141560 0.14492754 0.1529607
46 0.2284966 0 21.51658 1.7707353 1.8736810 0.14285714 0.1662717
47 0.2798071 0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
48 0.1955145 0 28.86993 1.3117774 1.6050695 0.10000000 0.1582702
49 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
50 0.3256208 0 31.14627 0.9078543 0.9346023 0.18055556 0.1547116
51 0.2798071 0 22.39459 1.2381613 1.4589752 0.14492754 0.1512425
52 0.1941546 0 14.12701 2.5014473 2.8671010 0.08396947 0.2068573
53 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
54 0.2233182 0 26.66568 1.1781803 1.2631824 0.11842105 0.1467950
55 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
56 0.1911848 0 23.52348 1.8284677 1.9798721 0.12195122 0.1617180
57 0.2128360 0 22.79792 1.9260579 2.0171403 0.13414634 0.1536275
58 0.2168573 0 18.38749 1.4443697 2.0629950 0.08148148 0.1996360
59 0.2823329 0 19.36654 1.2757070 1.7666756 0.08888889 0.2064461
60 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
61 0.2993477 0 26.54974 1.2566468 1.4449901 0.14705882 0.1635625
62 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
63 0.1354536 0 14.17592 1.9028523 2.3671551 0.08928571 0.1945729
64 0.3073942 0 26.64271 0.8748602 0.9951907 0.14705882 0.1671014
65 0.2909841 0 22.44752 1.0798717 1.2153745 0.14492754 0.1547814
66 0.3747658 0 30.45188 0.6940726 0.7647676 0.18644068 0.1538688
67 0.1900115 0 18.98990 1.1484432 1.1689030 0.13513514 0.1558790
68 0.1170078 0 18.68287 1.1717777 1.6333408 0.07633588 0.2021902
69 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
70 0.2253970 0 21.77116 1.6244867 1.7485403 0.10989011 0.1609345
71 0.2628665 0 20.31148 1.4623198 1.5763948 0.15873016 0.1568197
72 0.2879393 0 20.40829 0.9871044 1.4338029 0.08888889 0.2050737
73 0.3612311 0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
74 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
75 0.2456433 0 22.39671 1.3276921 1.4104741 0.12162162 0.1605648
76 0.2078312 0 18.82915 1.6932078 2.2658465 0.08396947 0.1958815
77 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
78 0.2382307 0 20.72286 1.7611842 2.0391447 0.08888889 0.1929157
79 0.2094742 0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
80 0.1932313 0 18.46895 1.3260993 1.7516665 0.08928571 0.1809625
81 0.2403821 0 21.53393 1.3588650 1.4736465 0.13513514 0.1677886
82 0.2118094 0 22.29318 0.9943062 1.1051276 0.11764706 0.1815512
83 0.3801553 0 24.84512 1.2020464 1.2857002 0.13414634 0.1644943
84 0.3765237 0 29.30090 1.0328111 1.0915961 0.20338983 0.1482244
85 0.2904063 0 24.23261 1.1792735 1.3501068 0.14492754 0.1494218
86 0.3024563 0 26.41041 1.2689437 1.4516487 0.14705882 0.1637939
87 0.2756091 0 20.92445 1.4813641 1.5843049 0.08888889 0.1952426
88 0.1984354 0 29.00511 1.3175280 1.6084602 0.10000000 0.1584127
89 0.3612311 0 23.06653 1.3378768 1.4198228 0.14634146 0.1673015
90 0.2391533 0 20.68884 1.8402949 1.9491779 0.13513514 0.1618730
91 0.2750848 0 22.27727 1.2633329 1.3011376 0.13513514 0.1618074
92 0.2634083 0 22.70326 1.2548293 1.3077921 0.13513514 0.1621190
93 0.3582754 0 28.67233 0.6869252 0.7535149 0.13750000 0.1549651
94 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
95 0.2094742 0 23.02502 0.9644303 1.0667018 0.12195122 0.1645362
96 0.3546810 0 25.16784 1.0903469 1.1321172 0.10975610 0.1633902
97 0.3656055 0 24.86673 1.1657459 1.2349088 0.14634146 0.1654051
98 0.1932916 0 17.70319 1.3395919 1.8360966 0.08396947 0.1921884
99 0.3744142 0 27.17061 0.9968825 1.0471875 0.14634146 0.1582313
100 0.3571977 0 29.17184 1.3977346 1.4811949 0.21428571 0.1446855
for (i in 2:10){df_clust_opt <- tsclust(df_lst, type = "partitional", k = i, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt)}
for (i in 1:10){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 11:20){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
for (i in 21:30){df_clust_opt_seed <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = i)
plot(df_clust_opt_seed)}
#k4
df_clust_opt_final <- tsclust(df_lst, type = "partitional", k = 4, distance = "dtw", centroid = "pam",seed = 700)
plot(df_clust_opt_final)
# Extract cluster assignments
cluster_assignments <- df_clust_opt_final@cluster
# Determine the number of clusters
num_clusters <- max(cluster_assignments)
# Loop through each cluster and print the jurisdictions in it
for (cluster_number in 1:num_clusters) {
cat("Jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Print the jurisdictions corresponding to these indices
print(jurisdiction[indices_in_cluster])
cat("\n") # Add a newline for readability
}
Jurisdictions in Cluster 1 :
[1] "Alaska" "Delaware" "Illinois" "Michigan" "New York" "Oklahoma" "Oregon" "South Carolina"
[9] "West Virginia"
Jurisdictions in Cluster 2 :
[1] "Connecticut" "Florida" "Indiana" "Iowa" "Massachusetts" "Minnesota" "Montana" "Nebraska"
[9] "New Hampshire" "New Jersey" "North Dakota" "Pennsylvania" "South Dakota" "Texas" "Utah" "Virginia"
[17] "Wisconsin" "Wyoming"
Jurisdictions in Cluster 3 :
[1] "Alabama" "Arizona" "Arkansas" "California" "Georgia" "Hawaii" "Kentucky" "Louisiana" "Mississippi"
[10] "Missouri" "Nevada" "New Mexico" "Rhode Island" "Tennessee"
Jurisdictions in Cluster 4 :
[1] "Colorado" "Idaho" "Kansas" "Maine" "Maryland" "National" "North Carolina" "Ohio"
[9] "Vermont" "Washington"
# Load necessary libraries
library(ggplot2)
library(reshape2)
# Loop through each cluster
for (cluster_number in 1:num_clusters) {
cat("Plotting jurisdictions in Cluster", cluster_number, ":\n")
# Find the indices of jurisdictions in this cluster
indices_in_cluster <- which(cluster_assignments == cluster_number)
# Get the names of the jurisdictions in this cluster
jurisdictions_in_cluster <- jurisdiction[indices_in_cluster]
# Filter the gap_list_year data frame for these jurisdictions
cluster_data <- df[df$Jurisdiction %in% jurisdictions_in_cluster, ]
# Convert the data to long format for ggplot
long_df <- melt(cluster_data, id.vars = "Jurisdiction", variable.name = "Year", value.name = "Value")
# Plot
p <- ggplot(long_df, aes(x = Year, y = Value, group = Jurisdiction, color = Jurisdiction)) +
geom_line() +
labs(title = paste("Cluster", cluster_number), x = "Year", y = "Scale Score") +
theme(legend.position = "right")
print(p)
#ggsave(paste("cluster_", cluster_number, ".png", sep=""), plot = p)
}
Plotting jurisdictions in Cluster 1 :
Plotting jurisdictions in Cluster 2 :
Plotting jurisdictions in Cluster 3 :
Plotting jurisdictions in Cluster 4 :